knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)

library(tidyverse)
library(RColorBrewer)
library(stringr)

### Set up some options
options(stringsAsFactors = FALSE) ### Ensure strings come in as character types

### generic theme for all plots
ggtheme_plot <- function(base_size = 9) {
  theme(axis.ticks = element_blank(),
               text             = element_text(family = 'Helvetica', color = 'gray30', size = base_size),
               plot.title       = element_text(size = rel(1.25), hjust = 0, face = 'bold'),
               panel.background = element_blank(),
               legend.position  = 'right',
               panel.border     = element_blank(),
               panel.grid.minor = element_blank(),
               panel.grid.major = element_line(colour = 'grey90', size = .25),
               # panel.grid.major = element_blank(),
               legend.key       = element_rect(colour = NA, fill = NA),
               axis.line        = element_blank()) # element_line(colour = "grey30", size = .5))
}

1 Chesapeake Bay Project API

Main data access page: http://www.chesapeakebay.net/data/

API notes: http://data.chesapeakebay.net/API

1.1 Water Quality

  • Namespace
  • Programs WaterQuality/Programs
  • Projects WaterQuality/Projects
  • Geographical Attributes List
    • HUC8 datahub.chesapeakebay.net/api.json/HUC8
    • HUC12 datahub.chesapeakebay.net/api.csv/HUC12
    • FIPS datahub.chesapeakebay.net/api.csv/FIPS
      • “County/City (FIPS) - the Federal Information Processing System (FIPS) assigns 5-digit codes to all counties and incorporated cities in the United States. The first two digits correspond to the state and the last three to the county or incorporated city within that state.”
      • Get a FIPS list for the area around Chesapeake Bay
      • API doesn’t seem to work for FIPS…
    • CBSeg2003 datahub.chesapeakebay.net/api.json/CBSeg2003
    • SegmentShed2009 datahub.chesapeakebay.net/api.json/SegmentShed2009
    • Station datahub.chesapeakebay.net/api.json/Station

1.1.1 FIPS codes for Virginia, Maryland, Delaware, and DC

fips_df <- read_csv('https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt', 
              skip = 16, col_names = FALSE) %>%
  setNames('raw') %>%
  mutate(raw = str_trim(raw)) %>%
  separate(raw, into = c('fips_id', 'place_name'), sep = '[ ]{3,}') %>%
  mutate(fips_id = as.integer(fips_id)) %>%
  filter(!is.na(fips_id))

states_of_interest <- c('VIRGINIA', 'MARYLAND', 'DISTRICT OF COLUMBIA', 'DELAWARE')

fips_states <- fips_df %>%
  filter(fips_id < 100) %>%
  filter(place_name %in% states_of_interest) %>%
  rename(state = place_name, state_id = fips_id)

fips_chesapeake <- fips_df %>%
  filter(fips_id >= 1000) %>%
  mutate(state_id = floor(fips_id / 1000)) %>%
  filter(state_id %in% fips_states$state_id) %>%
  left_join(fips_states, by = 'state_id') %>%
  select(-state_id)

write_csv(fips_chesapeake, 'data/fips_chesapeake.csv')

DT::datatable(fips_chesapeake)
fips_check <- c('dc'               = 11001, 
                'fairfax county'   = 51059, 
                'fairfax city'     = 51600,
                'arlington county' = 51013)

# x <- read_csv('data/water_qual_download_large.csv') %>%
#   filter(FIPS %in% fips_check)
# 
# write_csv(x, 'data/water_qual_local.csv')

x <- read_csv('data/water_qual_local.csv')

# names(x)
#  [1] "FIPS"                "EventId"             "Cruise"              "Program"            
#  [5] "Project"             "Agency"              "Source"              "Station"            
#  [9] "SampleDate"          "SampleTime"          "TotalDepth"          "UpperPycnocline"    
# [13] "LowerPycnocline"     "Depth"               "Layer"               "SampleType"         
# [17] "SampleReplicateType" "Parameter"           "Qualifier"           "MeasureValue"       
# [21] "Unit"                "Method"              "Lab"                 "Problem"            
# [25] "PrecisionPC"         "BiasPC"              "Details"             "Latitude"           
# [29] "Longitude"

# x$Parameter %>% unique()

#  [1] "BOD5W"     "CHLA"      "CLW"       "DIN"       "DO"        "DOC"       "DON"      
#  [8] "DOP"       "FCOLI_M"   "FLOW_INS"  "HARDNESS"  "KD"        "NH4F"      "NO23F"    
# [15] "NO2F"      "NO3F"      "PC"        "PH"        "PHEO"      "PIP"       "PN"       
# [22] "PO4F"      "PP"        "SALINITY"  "SECCHI"    "SIF"       "SIGMA_T"   "SO4W"     
# [29] "SPCOND"    "SSC_%FINE" "SSC_TOTAL" "TALK"      "TCOLI_M"   "TDN"       "TDP"      
# [36] "TN"        "TON"       "TP"        "TSS"       "TURB_FNU"  "TURB_NTU"  "VSS"      
# [43] "WTEMP"     "FSS"       "PIC"       "POC"       "TDS"       "TKNW"      "TOC"      
# [50] "TURB_NTRU" "FCOLI_C"   "NH4W"      "NO23W"     "NO2W"      "NO3W"      "PO4W"     
# [57] "SSC_FINE"  "SSC_SAND" 

y <- x %>%
  group_by(Parameter) %>%
  filter(n() > 500) %>%
  ungroup()

y$Parameter %>% table()
## .
##     BOD5W      CHLA       DIN        DO       DON       DOP   FCOLI_M 
##      1481      3216       691      6037       633       609      1522 
##  HARDNESS      NH4F     NO23F      NO2F      NO3F        PC        PH 
##      2006      2088       609      2165       572       576      5942 
##        PN      PO4F        PP  SALINITY    SECCHI   SIGMA_T    SPCOND 
##       551       861       577      2331      1462      2286      6342 
## SSC_TOTAL      TALK       TDN       TDP      TKNW        TN        TP 
##       682      1681       536       577       618      1037       992 
##       TSS  TURB_NTU     WTEMP 
##      2376      3458      6311
    # BOD5W      CHLA       DIN        DO       DON       DOP   FCOLI_M  HARDNESS 
    #  1481      3216       691      6037       633       609      1522      2006 
    #  NH4F     NO23F      NO2F      NO3F        PC        PH        PN      PO4F 
    #  2088       609      2165       572       576      5942       551       861 
    #    PP  SALINITY    SECCHI   SIGMA_T    SPCOND SSC_TOTAL      TALK       TDN 
    #   577      2331      1462      2286      6342       682      1681       536 
    #   TDP      TKNW        TN        TP       TSS  TURB_NTU     WTEMP 
    #   577       618      1037       992      2376      3458      6311 

Some helpful documentation on translating parameter codes: http://www.chesapeakebay.net/documents/3676/cbwqdb2004_rb.pdf

x <- read_csv('data/water_qual_local.csv')

y <- x %>%
  group_by(Parameter) %>%
  filter(n() > 500) %>%
  ungroup()

params_to_keep <- c("WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L", 
  "ACTIVE CHLOROPHYLL-A UG/L", 
  "DISSOLVED OXYGEN IN MG/L MG/L", 
  "TOTAL DISSOLVED NITROGEN MG/L", 
  "TOTAL DISSOLVED PHOSPHORUS MG/L", 
  "HARDNESS AS CACO3 MG/L", 
  "TOTAL ALKALINITY AS CACO3 MG/L", 
  "PH CORRECTED FOR TEMPERATURE (25 DEG C) SU", 
  "SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT", 
  "AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L", 
  "NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L", 
  "NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L", 
  "NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L", 
  "SECCHI DEPTH M", 
  "TOTAL SUSPENDED SOLIDS MG/L", 
  "TURBIDITY; NEPHELOMETRIC METHOD NTU", 
  "WATER TEMPERATURE DEG C")

param_df <- read_csv('data/param_lookup.txt', col_names = FALSE) %>%
  .$X1 %>%
  str_split_fixed(' ', 2) %>%
  as.data.frame() %>%
  setNames(c('param', 'param_desc')) %>%
  filter(param_desc %in% params_to_keep) %>%
  mutate(param_desc = factor(param_desc, ordered = TRUE))

z <- y %>%
  inner_join(param_df, by = c('Parameter' = 'param')) %>%
  select(FIPS, EventId, Program, Project, Station, SampleDate, 
         SampleTime, TotalDepth, Depth, Layer, 
         Parameter, MeasureValue, Unit, Details, 
         Latitude, Longitude, param_desc) %>%
  mutate(SampleDate = as.Date(SampleDate, format = '%m/%d/%Y')) %>%
  distinct() %>%
  left_join(fips_df, by = c('FIPS' = 'fips_id'))

write_csv(z, 'data/water_qual_student.csv')

1.1.2 Parameter list for Fairfax/Arlington/DC area

All the parameters in this area:

  • “WHOLE 5-DAY BIOCHEMICAL OXYGEN DEMAND MG/L”
    “ACTIVE CHLOROPHYLL-A UG/L”
    “DISSOLVED INORGANIC NITROGEN MG/L”
    “DISSOLVED OXYGEN IN MG/L MG/L”
    “DISSOLVED ORGANIC NITROGEN MG/L”
    “DISSOLVED ORGANIC PHOSPHORUS MG/L”
    “HARDNESS AS CACO3 MG/L”
    “AMMONIUM NITROGEN AS N (FILTERED SAMPLE) MG/L”
    “NITRITE+NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L”
    “NITRITE NITROGEN AS N (FILTERED SAMPLE) MG/L”
    “NITRATE NITROGEN AS N (FILTERED SAMPLE) MG/L”
    “PARTICULATE CARBON MG/L”
    “PH CORRECTED FOR TEMPERATURE (25 DEG C) SU”
    “SALINITY UNITS IN PPT AND EQUAL TO PRACTICAL SALNITY UNITS (PSU) PPT” “SECCHI DEPTH M”
    “TOTAL ALKALINITY AS CACO3 MG/L”
    “TOTAL DISSOLVED NITROGEN MG/L”
    “TOTAL DISSOLVED PHOSPHORUS MG/L”
    “TOTAL SUSPENDED SOLIDS MG/L”
    “TURBIDITY; NEPHELOMETRIC METHOD NTU”
    “WATER TEMPERATURE DEG C”

2 Plots of parameters vs time

z <- read_csv('data/water_qual_student.csv')

params <- z$param_desc %>% unique()

for (param in params) {
  ### param <- params[1]
  tmp <- z %>%
    filter(param_desc == param)
  
  plot_units <- tmp$Unit[!is.na(tmp$Unit)][1]
  plot_param_short  <- tmp$Parameter[1]
  
  param_plot <- ggplot(tmp, aes(x = SampleDate, y = MeasureValue, color = place_name)) +
    geom_point(alpha = .5) +
    labs(title = tools::toTitleCase(param),
         y = paste0(plot_param_short, ' (', plot_units, ')'),
         x = 'sample date')
  
  print(param_plot)
}